package com.atguigu.bigdata.spark.core.rdd.operator.transform;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;

import java.util.List;

public class Spark07_RDD_Operator_Transform_Test_JAVA {
    public static void main(String[] args) {
        SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("sparkCore");
        JavaSparkContext sc = new JavaSparkContext(conf);
        //从服务器日志数据apache.log中获取2015年5月17日的请求路径
        JavaRDD<String> rdd = sc.textFile("datas/apache.log");

        JavaRDD<String> filterRDD = rdd.filter(new Function<String, Boolean>() {
            @Override
            public Boolean call(String s) throws Exception {
                String[] value = s.split(" ");
                return value[3].startsWith("17/05/2015");
            }
        });
        List<String> res = filterRDD.collect();
        for(String val : res) {
            System.out.println(val);
        }
        sc.stop();
    }
}
